<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Pattern Analyzer | Elasticsearch Guide [7.7] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch Guide [7.7]">
<link rel="up" href="analysis-analyzers.html" title="Built-in analyzer reference">
<link rel="prev" href="analysis-lang-analyzer.html" title="Language Analyzers">
<link rel="next" href="analysis-simple-analyzer.html" title="Simple Analyzer">
<meta name="DC.type" content="Learn/Docs/Elasticsearch/Reference/7.7">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="7.7">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<strong>IMPORTANT</strong>: No additional bug fixes or documentation updates
will be released for this version. For the latest information, see the
<a href="../current/index.html">current release documentation</a>.
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch Guide [7.7]</a></span>
»
<span class="breadcrumb-link"><a href="analysis.html">Text analysis</a></span>
»
<span class="breadcrumb-link"><a href="analysis-analyzers.html">Built-in analyzer reference</a></span>
»
<span class="breadcrumb-node">Pattern Analyzer</span>
</div>
<div class="navheader">
<span class="prev">
<a href="analysis-lang-analyzer.html">« Language Analyzers</a>
</span>
<span class="next">
<a href="analysis-simple-analyzer.html">Simple Analyzer »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="analysis-pattern-analyzer"></a>Pattern Analyzer<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc">edit</a>
</h2>
</div></div></div>
<p>The <code class="literal">pattern</code> analyzer uses a regular expression to split the text into terms.
The regular expression should match the <span class="strong strong"><strong>token separators</strong></span>  not the tokens
themselves. The regular expression defaults to <code class="literal">\W+</code> (or all non-word characters).</p>
<div class="warning admon">
<div class="icon"></div>
<div class="admon_content">
<h3>Beware of Pathological Regular Expressions</h3>
<p>The pattern analyzer uses
<a href="http://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html" class="ulink" target="_top">Java Regular Expressions</a>.</p>
<p>A badly written regular expression could run very slowly or even throw a
StackOverflowError and cause the node it is running on to exit suddenly.</p>
<p>Read more about <a href="http://www.regular-expressions.info/catastrophic.html" class="ulink" target="_top">pathological regular expressions and how to avoid them</a>.</p>
</div>
</div>
<h3>
<a id="_example_output_3"></a>Example output<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc">edit</a>
</h3>
<div class="pre_wrapper lang-console">
<pre class="programlisting prettyprint lang-console">POST _analyze
{
  "analyzer": "pattern",
  "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
}</pre>
</div>
<div class="console_widget" data-snippet="snippets/847.console"></div>
<p>The above sentence would produce the following terms:</p>
<div class="pre_wrapper lang-text">
<pre class="programlisting prettyprint lang-text">[ the, 2, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ]</pre>
</div>
<h3>
<a id="_configuration_4"></a>Configuration<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc">edit</a>
</h3>
<p>The <code class="literal">pattern</code> analyzer accepts the following parameters:</p>
<div class="informaltable">
<table border="0" cellpadding="4px">
<colgroup>
<col>
<col>
</colgroup>
<tbody valign="top">
<tr>
<td valign="top">
<p>
<code class="literal">pattern</code>
</p>
</td>
<td valign="top">
<p>
A <a href="http://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html" class="ulink" target="_top">Java regular expression</a>, defaults to <code class="literal">\W+</code>.
</p>
</td>
</tr>
<tr>
<td valign="top">
<p>
<code class="literal">flags</code>
</p>
</td>
<td valign="top">
<p>
Java regular expression <a href="http://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#field.summary" class="ulink" target="_top">flags</a>.
Flags should be pipe-separated, eg <code class="literal">"CASE_INSENSITIVE|COMMENTS"</code>.
</p>
</td>
</tr>
<tr>
<td valign="top">
<p>
<code class="literal">lowercase</code>
</p>
</td>
<td valign="top">
<p>
Should terms be lowercased or not. Defaults to <code class="literal">true</code>.
</p>
</td>
</tr>
<tr>
<td valign="top">
<p>
<code class="literal">stopwords</code>
</p>
</td>
<td valign="top">
<p>
A pre-defined stop words list like <code class="literal">_english_</code> or an array  containing a
list of stop words.  Defaults to <code class="literal">_none_</code>.
</p>
</td>
</tr>
<tr>
<td valign="top">
<p>
<code class="literal">stopwords_path</code>
</p>
</td>
<td valign="top">
<p>
The path to a file containing stop words.
</p>
</td>
</tr>
</tbody>
</table>
</div>
<p>See the <a class="xref" href="analysis-stop-tokenfilter.html" title="Stop token filter">Stop Token Filter</a> for more information
about stop word configuration.</p>
<h3>
<a id="_example_configuration_3"></a>Example configuration<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc">edit</a>
</h3>
<p>In this example, we configure the <code class="literal">pattern</code> analyzer to split email addresses
on non-word characters or on underscores (<code class="literal">\W|_</code>), and to lower-case the result:</p>
<div class="pre_wrapper lang-console">
<pre class="programlisting prettyprint lang-console">PUT my_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_email_analyzer": {
          "type":      "pattern",
          "pattern":   "\\W|_", <a id="CO409-1"></a><i class="conum" data-value="1"></i>
          "lowercase": true
        }
      }
    }
  }
}

POST my_index/_analyze
{
  "analyzer": "my_email_analyzer",
  "text": "John_Smith@foo-bar.com"
}</pre>
</div>
<div class="console_widget" data-snippet="snippets/848.console"></div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO409-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The backslashes in the pattern need to be escaped when specifying the
pattern as a JSON string.</p>
</td>
</tr>
</table>
</div>
<p>The above example produces the following terms:</p>
<div class="pre_wrapper lang-text">
<pre class="programlisting prettyprint lang-text">[ john, smith, foo, bar, com ]</pre>
</div>
<h4>
<a id="_camelcase_tokenizer"></a>CamelCase tokenizer<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc">edit</a>
</h4>
<p>The following more complicated example splits CamelCase text into tokens:</p>
<div class="pre_wrapper lang-console">
<pre class="programlisting prettyprint lang-console">PUT my_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "camel": {
          "type": "pattern",
          "pattern": "([^\\p{L}\\d]+)|(?&lt;=\\D)(?=\\d)|(?&lt;=\\d)(?=\\D)|(?&lt;=[\\p{L}&amp;&amp;[^\\p{Lu}]])(?=\\p{Lu})|(?&lt;=\\p{Lu})(?=\\p{Lu}[\\p{L}&amp;&amp;[^\\p{Lu}]])"
        }
      }
    }
  }
}

GET my_index/_analyze
{
  "analyzer": "camel",
  "text": "MooseX::FTPClass2_beta"
}</pre>
</div>
<div class="console_widget" data-snippet="snippets/849.console"></div>
<p>The above example produces the following terms:</p>
<div class="pre_wrapper lang-text">
<pre class="programlisting prettyprint lang-text">[ moose, x, ftp, class, 2, beta ]</pre>
</div>
<p>The regex above is easier to understand as:</p>
<div class="pre_wrapper lang-regex">
<pre class="programlisting prettyprint lang-regex">  ([^\p{L}\d]+)                 # swallow non letters and numbers,
| (?&lt;=\D)(?=\d)                 # or non-number followed by number,
| (?&lt;=\d)(?=\D)                 # or number followed by non-number,
| (?&lt;=[ \p{L} &amp;&amp; [^\p{Lu}]])    # or lower case
  (?=\p{Lu})                    #   followed by upper case,
| (?&lt;=\p{Lu})                   # or upper case
  (?=\p{Lu}                     #   followed by upper case
    [\p{L}&amp;&amp;[^\p{Lu}]]          #   then lower case
  )</pre>
</div>
<h3>
<a id="_definition_3"></a>Definition<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc">edit</a>
</h3>
<p>The <code class="literal">pattern</code> anlayzer consists of:</p>
<div class="variablelist">
<dl class="variablelist">
<dt>
<span class="term">
Tokenizer
</span>
</dt>
<dd>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<a class="xref" href="analysis-pattern-tokenizer.html" title="Pattern Tokenizer">Pattern Tokenizer</a>
</li>
</ul>
</div>
</dd>
<dt>
<span class="term">
Token Filters
</span>
</dt>
<dd>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<a class="xref" href="analysis-lowercase-tokenfilter.html" title="Lowercase token filter">Lower Case Token Filter</a>
</li>
<li class="listitem">
<a class="xref" href="analysis-stop-tokenfilter.html" title="Stop token filter">Stop Token Filter</a> (disabled by default)
</li>
</ul>
</div>
</dd>
</dl>
</div>
<p>If you need to customize the <code class="literal">pattern</code> analyzer beyond the configuration
parameters then you need to recreate it as a <code class="literal">custom</code> analyzer and modify
it, usually by adding token filters. This would recreate the built-in
<code class="literal">pattern</code> analyzer and you can use it as a starting point for further
customization:</p>
<div class="pre_wrapper lang-console">
<pre class="programlisting prettyprint lang-console">PUT /pattern_example
{
  "settings": {
    "analysis": {
      "tokenizer": {
        "split_on_non_word": {
          "type":       "pattern",
          "pattern":    "\\W+" <a id="CO410-1"></a><i class="conum" data-value="1"></i>
        }
      },
      "analyzer": {
        "rebuilt_pattern": {
          "tokenizer": "split_on_non_word",
          "filter": [
            "lowercase"       <a id="CO410-2"></a><i class="conum" data-value="2"></i>
          ]
        }
      }
    }
  }
}</pre>
</div>
<div class="console_widget" data-snippet="snippets/850.console"></div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO410-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The default pattern is <code class="literal">\W+</code> which splits on non-word characters
and this is where you’d change it.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO410-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>You’d add other token filters after <code class="literal">lowercase</code>.</p>
</td>
</tr>
</table>
</div>
</div>
<div class="navfooter">
<span class="prev">
<a href="analysis-lang-analyzer.html">« Language Analyzers</a>
</span>
<span class="next">
<a href="analysis-simple-analyzer.html">Simple Analyzer »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
